使用前面深度圖轉換成 3D 點雲的函數,我們可以來實作迭代最近點算法(Iterative closest point, ICP),這個算法可以估計兩個點雲之間的 3D 轉換(一個旋轉矩陣和位移向量),這樣就可以得到兩個相機的位移和旋轉。
def convert_point_cloud(rgb, depth, K):
H, W = depth.shape
fx = K[0, 0]
fy = K[1, 1]
cx = K[0, 2]
cy = K[1, 2]
x, y = np.meshgrid(np.arange(W), np.arange(H), indexing="xy")
x = x.flatten()
y = y.flatten()
# Get the values
rgb = rgb[y, x, :]
depth = depth[y, x]
# Remove invalid depth values (depth == 0)
mask = depth > 0
x = x[mask]
y = y[mask]
depth = depth[mask]
rgb = rgb[mask, :]
# Forward project: x = (X * fx / Z) + cx
# Inverse project: X = (x - cx) * depth / fx
X = (x - cx) * depth / fx
Y = (y - cy) * depth / fy
Z = depth
points = np.stack([X, Y, Z], axis=-1)
return points, rgb
def main():
dataset = TUMRGBD("data/rgbd_dataset_freiburg2_desk")
frames = []
# Get the first two valid frames
for i in range(0, len(dataset), 50):
x = dataset[i]
if x is None:
continue
frames.append(x)
if len(frames) == 2:
break
K = dataset.intrinsic_matrix()
rgb1 = cv2.imread(frames[0]["rgb_path"])
rgb1 = cv2.cvtColor(rgb1, cv2.COLOR_BGR2RGB)
depth1 = cv2.imread(frames[0]["depth_path"], cv2.IMREAD_UNCHANGED)
depth1 = depth1.astype(np.float32) / 5000.0
camera_to_world1 = np.eye(4)
camera_to_world1[:3, :3] = frames[0]["rotation"]
camera_to_world1[:3, 3] = frames[0]["translation"]
rgb2 = cv2.imread(frames[1]["rgb_path"])
rgb2 = cv2.cvtColor(rgb2, cv2.COLOR_BGR2RGB)
depth2 = cv2.imread(frames[1]["depth_path"], cv2.IMREAD_UNCHANGED)
depth2 = depth2.astype(np.float32) / 5000.0
camera_to_world2 = np.eye(4)
camera_to_world2[:3, :3] = frames[1]["rotation"]
camera_to_world2[:3, 3] = frames[1]["translation"]
points1, colors1 = convert_point_cloud(rgb1, depth1, K)
points2, colors2 = convert_point_cloud(rgb2, depth2, K)
# Subsample the number of points
points1 = points1[::10, :]
colors1 = colors1[::10]
points2 = points2[::10, :]
colors2 = colors2[::10]
# Add opacity for visualization
rgb1 = cv2.cvtColor(rgb1, cv2.COLOR_RGB2RGBA)
rgb1[:, :, 3] = 128
rgb2 = cv2.cvtColor(rgb2, cv2.COLOR_RGB2RGBA)
rgb2[:, :, 3] = 128
create_frustum_with_image(
rgb1,
camera_to_world=camera_to_world1,
color="blue",
axis=True,
)
create_frustum_with_image(
rgb2,
camera_to_world=camera_to_world2,
color="red",
axis=True,
)
# Apply the transformtion of the first frame to both point clouds
points1 = (camera_to_world1[:3, :3] @ points1.T + camera_to_world1[:3, 3][:, None]).T
points2 = (camera_to_world1[:3, :3] @ points2.T + camera_to_world1[:3, 3][:, None]).T
create_point_cloud(points1, colors1, parent=view.scene)
create_point_cloud(points2, colors2, parent=view.scene)
隨然視覺化相機的部分是拿已知的相機轉換,但視覺化點雲的部分,我們假設不知道兩個相機之間的轉換,所以將points1
和points2
都轉換到第一個相機的座標系中,會得到以下的結果:
可以看到兩個點雲並沒有對齊,因此我們需要使用 ICP 來估計兩個點雲之間的轉換。
首先要找到兩個點雲之間的最近點,最簡單的方式是利用兩層 for 迴圈,但這樣的時間複雜度是 O(n^2),這樣的點多時效率太差。因此我們可以使用一些資料結構來加速這個過程,例如這裡使用 KD-Tree。
def nearest_neighbor(src, dst):
"""
Finds the nearest (Euclidean) neighbor using KDTree in dst for each point in src.
Returns the indices and distances.
"""
tree = KDTree(dst)
distances, indices = tree.query(src, k=1)
return distances, indices
ICP 算法估計旋轉與位移的核心是:
def best_fit_transform(A, B):
"""
Calculates the least-squares best-fit transform that maps points A to points B in 3D.
Returns the rotation matrix R and the translation vector t.
"""
# Compute the centroids of the point clouds
centroid_A = np.mean(A, axis=0)
centroid_B = np.mean(B, axis=0)
# Center the points by subtracting the centroids
AA = A - centroid_A
BB = B - centroid_B
# Compute the covariance matrix H
H = np.dot(AA.T, BB)
# Singular Value Decomposition
U, S, Vt = np.linalg.svd(H)
# Compute the rotation matrix
R = np.dot(Vt.T, U.T)
# Compute the translation vector
t = centroid_B.T - np.dot(R, centroid_A.T)
return R, t
接下來就是 ICP 的主要迴圈,每次重複尋找最近點並計算當次的轉換,直到收斂:
def icp(source_points, target_points, max_iterations=50, tolerance=1e-6):
"""
Iterative Closest Point (ICP) algorithm to align source_points to target_points.
Args:
source_points: (N, 3) numpy array of source points.
target_points: (N, 3) numpy array of target points.
max_iterations: Maximum number of iterations to run ICP.
tolerance: Convergence criteria for the alignment.
Returns:
final_transformation: (4, 4) homogeneous transformation matrix.
distances: Final distances between the aligned points.
"""
# Initial transformation: identity matrix
src_homogeneous = np.ones((source_points.shape[0], 4))
src_homogeneous[:, 0:3] = source_points
# Start with the identity matrix
transformation = np.eye(4)
for i in range(max_iterations):
# Find the nearest neighbors between current source and target points
distances, indices = nearest_neighbor(src_homogeneous[:, 0:3], target_points)
# Check for convergence (based on distance)
mean_error = np.mean(distances)
print(f"Iteration {i:03d}: Mean error: {mean_error:.3f}")
if mean_error < tolerance:
break
# Estimate the best transformation
R, t = best_fit_transform(src_homogeneous[:, 0:3], target_points[indices])
# Update the transformation matrix
T = np.eye(4)
T[0:3, 0:3] = R
T[0:3, 3] = t
# Apply this transformation to the source points
src_homogeneous = (T @ src_homogeneous.T).T
# Update the total transformation
transformation = T @ transformation
return transformation, distances
在每次迭代中,計算兩個點雲的最近點距離,作為收斂的標準,如果距離小於設定好的 tolerance
,則停止迭代,否則就跑到最大迭代次數 max_iterations
為止。每次迭代都會得到當下的轉換矩陣,所以要透過 transformation = T @ transformation
來疊加到目前為止的轉換矩陣。
最後,執行這個函式並且視覺化結果
T, dist = icp(points1, points2, max_iterations=100)
R = T[:3, :3]
t = T[:3, 3]
points1 = (R @ points1.T + t[:, None]).T
create_point_cloud(points1, colors1, parent=view.scene)
create_point_cloud(points2, colors2, parent=view.scene)
要注意的是,這個寫法是估計如何將第一個點雲轉換到第二個點雲的座標系,因此在視覺化時,我們將第一個點雲轉換到第二個點雲的座標系中,這樣就可以看到兩個點雲對齊的結果: